## Efrat 2010 IO

library(foreign) 
library(Amelia)

## Load original dataset
e2010 <- read.dta("E2010 IO Rep Data.dta")
head(e2010)
dim(e2010)

## Remove extraneous data
e2010 <- e2010[c(1:118),]

## Extract numbers from string variables
e2010$scope <- as.numeric(e2010$scope)
e2010$level <- as.numeric(e2010$level)
e2010$obligation <- as.numeric(e2010$obligation)
e2010$followup <- as.numeric(e2010$followup)
e2010$importance <- as.numeric(e2010$importance)
e2010$transparency <- as.numeric(e2010$transparency)
e2010$civilianpo <- as.numeric(e2010$civilianpo)
e2010$nsa <- as.numeric(e2010$nsa)
e2010$unsc <- as.numeric(e2010$unsc)
e2010$hr <- as.numeric(e2010$hr)
e2010$marking <- as.numeric(e2010$marking)

## How many variables? 32: no reduction necessary
dim(e2010)

## Imputation
case <-c(1:nrow(e2010))
e2010 <- cbind(e2010, case)
head(e2010)

## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(e2010)
mean(NAs(e2010)/nrow(e2010))*100

## Thus: 5 imputations

set.seed(02138)
e2010.out <- amelia(e2010, m = 5, cs = "country", empri = 0.01*nrow(e2010))

write.amelia(obj=e2010.out, file.stem = "E2010 IO Imp Data", format = "dta", separate = FALSE)